BSF Biomineralization Project

Cell cleavage

Set up workspace

Load libraries

library("ggplot2")
library("tidyverse")
library("betareg")
library("emmeans")
library("dplyr")

Import the data

raw_data <- read.csv("5-Developmental_progression_and_size/Input/cell_cleavage.csv" , header = TRUE, sep = ",")

Change sample_ID and Treatment to Factor variables

raw_data$sample_ID <- as.factor(raw_data$sample_ID)
str(raw_data$sample_ID)
##  Factor w/ 9 levels "144","145","146",..: 1 2 3 4 5 6 7 8 9
raw_data$treatment <- as.factor(raw_data$treatment)
str(raw_data$treatment)
##  Factor w/ 3 levels "AMB","l","xl": 1 1 2 3 3 2 1 2 3

Normalize the counts based on number of cells per sample

Calculate total cells per sample

total_counts_per_sample <- raw_data %>%
  mutate(total_counts_per_sample = One_cell+Two_cell+Four_cell+Greater_than_4_cell) %>% 
  arrange(treatment)

show(total_counts_per_sample)
##   sample_ID treatment One_cell Two_cell Four_cell Greater_than_4_cell
## 1       144       AMB       72       31        32                  18
## 2       145       AMB       47       35        37                  92
## 3       150       AMB      220        5         8                   4
## 4       146         l       80       85        65                  18
## 5       149         l       43       37         9                   2
## 6       151         l      124       85        58                  15
## 7       147        xl       18       17        34                  13
## 8       148        xl       98       58        14                   2
## 9       152        xl       91       95        41                  10
##   total_counts_per_sample
## 1                     153
## 2                     211
## 3                     237
## 4                     248
## 5                      91
## 6                     282
## 7                      82
## 8                     172
## 9                     237

Calculate % at each stage

normalized_cleavage_counts <- total_counts_per_sample %>%
  mutate(percent_one_cell = (One_cell/total_counts_per_sample)) %>% 
  mutate(percent_two_cell = (Two_cell/total_counts_per_sample)) %>%
  mutate(percent_four_cell = (Four_cell/total_counts_per_sample)) %>%
  mutate(percent_greater_than_four_cell = (Greater_than_4_cell/total_counts_per_sample))

show(normalized_cleavage_counts)
##   sample_ID treatment One_cell Two_cell Four_cell Greater_than_4_cell
## 1       144       AMB       72       31        32                  18
## 2       145       AMB       47       35        37                  92
## 3       150       AMB      220        5         8                   4
## 4       146         l       80       85        65                  18
## 5       149         l       43       37         9                   2
## 6       151         l      124       85        58                  15
## 7       147        xl       18       17        34                  13
## 8       148        xl       98       58        14                   2
## 9       152        xl       91       95        41                  10
##   total_counts_per_sample percent_one_cell percent_two_cell percent_four_cell
## 1                     153        0.4705882       0.20261438        0.20915033
## 2                     211        0.2227488       0.16587678        0.17535545
## 3                     237        0.9282700       0.02109705        0.03375527
## 4                     248        0.3225806       0.34274194        0.26209677
## 5                      91        0.4725275       0.40659341        0.09890110
## 6                     282        0.4397163       0.30141844        0.20567376
## 7                      82        0.2195122       0.20731707        0.41463415
## 8                     172        0.5697674       0.33720930        0.08139535
## 9                     237        0.3839662       0.40084388        0.17299578
##   percent_greater_than_four_cell
## 1                     0.11764706
## 2                     0.43601896
## 3                     0.01687764
## 4                     0.07258065
## 5                     0.02197802
## 6                     0.05319149
## 7                     0.15853659
## 8                     0.01162791
## 9                     0.04219409
str(normalized_cleavage_counts)
## 'data.frame':    9 obs. of  11 variables:
##  $ sample_ID                     : Factor w/ 9 levels "144","145","146",..: 1 2 7 3 6 8 4 5 9
##  $ treatment                     : Factor w/ 3 levels "AMB","l","xl": 1 1 1 2 2 2 3 3 3
##  $ One_cell                      : int  72 47 220 80 43 124 18 98 91
##  $ Two_cell                      : int  31 35 5 85 37 85 17 58 95
##  $ Four_cell                     : int  32 37 8 65 9 58 34 14 41
##  $ Greater_than_4_cell           : int  18 92 4 18 2 15 13 2 10
##  $ total_counts_per_sample       : int  153 211 237 248 91 282 82 172 237
##  $ percent_one_cell              : num  0.471 0.223 0.928 0.323 0.473 ...
##  $ percent_two_cell              : num  0.2026 0.1659 0.0211 0.3427 0.4066 ...
##  $ percent_four_cell             : num  0.2092 0.1754 0.0338 0.2621 0.0989 ...
##  $ percent_greater_than_four_cell: num  0.1176 0.436 0.0169 0.0726 0.022 ...
gathered_cleavage_counts <- normalized_cleavage_counts %>% 
  select(sample_ID, treatment, percent_one_cell:percent_greater_than_four_cell) %>% 
  gather(cleavage_stage, proportion, -sample_ID, -treatment, convert = FALSE, na.rm = TRUE, factor_key = TRUE)

show(gathered_cleavage_counts)
##    sample_ID treatment                 cleavage_stage proportion
## 1        144       AMB               percent_one_cell 0.47058824
## 2        145       AMB               percent_one_cell 0.22274882
## 3        150       AMB               percent_one_cell 0.92827004
## 4        146         l               percent_one_cell 0.32258065
## 5        149         l               percent_one_cell 0.47252747
## 6        151         l               percent_one_cell 0.43971631
## 7        147        xl               percent_one_cell 0.21951220
## 8        148        xl               percent_one_cell 0.56976744
## 9        152        xl               percent_one_cell 0.38396624
## 10       144       AMB               percent_two_cell 0.20261438
## 11       145       AMB               percent_two_cell 0.16587678
## 12       150       AMB               percent_two_cell 0.02109705
## 13       146         l               percent_two_cell 0.34274194
## 14       149         l               percent_two_cell 0.40659341
## 15       151         l               percent_two_cell 0.30141844
## 16       147        xl               percent_two_cell 0.20731707
## 17       148        xl               percent_two_cell 0.33720930
## 18       152        xl               percent_two_cell 0.40084388
## 19       144       AMB              percent_four_cell 0.20915033
## 20       145       AMB              percent_four_cell 0.17535545
## 21       150       AMB              percent_four_cell 0.03375527
## 22       146         l              percent_four_cell 0.26209677
## 23       149         l              percent_four_cell 0.09890110
## 24       151         l              percent_four_cell 0.20567376
## 25       147        xl              percent_four_cell 0.41463415
## 26       148        xl              percent_four_cell 0.08139535
## 27       152        xl              percent_four_cell 0.17299578
## 28       144       AMB percent_greater_than_four_cell 0.11764706
## 29       145       AMB percent_greater_than_four_cell 0.43601896
## 30       150       AMB percent_greater_than_four_cell 0.01687764
## 31       146         l percent_greater_than_four_cell 0.07258065
## 32       149         l percent_greater_than_four_cell 0.02197802
## 33       151         l percent_greater_than_four_cell 0.05319149
## 34       147        xl percent_greater_than_four_cell 0.15853659
## 35       148        xl percent_greater_than_four_cell 0.01162791
## 36       152        xl percent_greater_than_four_cell 0.04219409

Use beta regression model to analyze variation in the proportion of cells at each cleavage stage and treatment

Beta regression models are suggested for proportional data instead of t-tests and anovas because the data is often not normally distributed and homoscedastic. Here, we use a beta regression test (as described by S. S. Mangiafico in the RCompanion Handbook chapter, Beta Regression for Percent and Proportion Data).

model = betareg(proportion ~ cleavage_stage + treatment + cleavage_stage:treatment,
                data = gathered_cleavage_counts) #Make the model
joint_tests(model) #do the test
##  model term               df1 df2 F.ratio p.value
##  cleavage_stage             3 Inf  14.144 <.0001 
##  treatment                  2 Inf   0.055 0.9468 
##  cleavage_stage:treatment   6 Inf   1.902 0.0765
summary(model) #examine the results
## 
## Call:
## betareg(formula = proportion ~ cleavage_stage + treatment + cleavage_stage:treatment, 
##     data = gathered_cleavage_counts)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -2.8435 -0.8755  0.1297  0.6620  3.7332 
## 
## Coefficients (mean model with logit link):
##                                                          Estimate Std. Error
## (Intercept)                                               0.35537    0.36444
## cleavage_stagepercent_two_cell                           -2.23215    0.59653
## cleavage_stagepercent_four_cell                          -2.09342    0.58567
## cleavage_stagepercent_greater_than_four_cell             -2.11844    0.58760
## treatmentl                                               -0.68147    0.51506
## treatmentxl                                              -0.78987    0.51718
## cleavage_stagepercent_two_cell:treatmentl                 2.00324    0.79018
## cleavage_stagepercent_four_cell:treatmentl                1.09678    0.80330
## cleavage_stagepercent_greater_than_four_cell:treatmentl   0.06892    0.85542
## cleavage_stagepercent_two_cell:treatmentxl                1.95154    0.79456
## cleavage_stagepercent_four_cell:treatmentxl               1.27565    0.80205
## cleavage_stagepercent_greater_than_four_cell:treatmentxl  0.17514    0.85667
##                                                          z value Pr(>|z|)    
## (Intercept)                                                0.975 0.329517    
## cleavage_stagepercent_two_cell                            -3.742 0.000183 ***
## cleavage_stagepercent_four_cell                           -3.574 0.000351 ***
## cleavage_stagepercent_greater_than_four_cell              -3.605 0.000312 ***
## treatmentl                                                -1.323 0.185808    
## treatmentxl                                               -1.527 0.126691    
## cleavage_stagepercent_two_cell:treatmentl                  2.535 0.011239 *  
## cleavage_stagepercent_four_cell:treatmentl                 1.365 0.172144    
## cleavage_stagepercent_greater_than_four_cell:treatmentl    0.081 0.935789    
## cleavage_stagepercent_two_cell:treatmentxl                 2.456 0.014044 *  
## cleavage_stagepercent_four_cell:treatmentxl                1.590 0.111728    
## cleavage_stagepercent_greater_than_four_cell:treatmentxl   0.204 0.838012    
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)    9.219      2.154   4.281 1.86e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood:  29.5 on 13 Df
## Pseudo R-squared: 0.5646
## Number of iterations: 20 (BFGS) + 2 (Fisher scoring)
plot(model) #Diagnostic plots

In summary, we have significant differences in the proportion of cells at different cleavage stages within a treatment, but there is not significant variation in the proportion of cells at different cleavage stages between treatments.

Plot cleavage stage

Calculate stats for cleavage stage and treatment categories

##Just treatment
gathered_cleavage_counts$proportion <- (gathered_cleavage_counts$proportion)*100 #data had to be proportions for the statistical test but we want them to be plotted as percentages. We do that here.
gathered_cleavage_counts <- rename(gathered_cleavage_counts, percent=proportion)

#Mean for all treatments should be 25 because 100/4=25. Standard dev may vary.
clvgstats <- select(gathered_cleavage_counts, treatment:percent)
clvgstats.amb <- clvgstats %>% filter(treatment=="AMB") %>% 
  summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))
clvgstats.l <- gathered_cleavage_counts %>% filter(treatment=="l") %>% 
  summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))
clvgstats.xl <- gathered_cleavage_counts %>% filter(treatment=="xl") %>% 
  summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent)) 

treatment.clvgstats.rownames <- as.factor(c("AMB", "l", "xl"))
treatment.clvgstats <- as.data.frame(bind_rows(clvgstats.amb, clvgstats.l, clvgstats.xl, .id=NULL))
treatment.clvgstats$cleavage_stage <- treatment.clvgstats.rownames
treatment.clvgstats <- treatment.clvgstats[c(5,1,2,3,4)]
show(treatment.clvgstats)
##   cleavage_stage mean       sd      min      max
## 1            AMB   25 25.79005 1.687764 92.82700
## 2              l   25 15.77375 2.197802 47.25275
## 3             xl   25 17.13955 1.162791 56.97674
##Just cleavage
#Cleavage stage means should differ
clvgstats.onecell <- clvgstats %>% filter(cleavage_stage=="percent_one_cell") %>% 
  summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))
clvgstats.twocell <- clvgstats %>% filter(cleavage_stage=="percent_two_cell") %>% 
  summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))
clvgstats.fourcell <- clvgstats %>% filter(cleavage_stage=="percent_four_cell") %>% 
  summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))
clvgstats.gfourcell <- clvgstats %>% filter(cleavage_stage=="percent_greater_than_four_cell") %>% 
  summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))

stage.clvgstats.rownames <- as.factor(c("One Cell", "Two Cells", "Four Cells", "> Four Cells"))
stage.clvgstats <- as.data.frame(bind_rows(clvgstats.onecell, clvgstats.twocell, clvgstats.fourcell, clvgstats.gfourcell, .id=NULL))
stage.clvgstats$cleavage_stage <- stage.clvgstats.rownames
stage.clvgstats <- stage.clvgstats[c(5,1,2,3,4)]
show(stage.clvgstats)
##   cleavage_stage     mean       sd       min      max
## 1       One Cell 44.77419 21.48958 21.951220 92.82700
## 2      Two Cells 26.50791 12.63031  2.109705 40.65934
## 3     Four Cells 18.37731 11.23231  3.375527 41.46341
## 4   > Four Cells 10.34058 13.39930  1.162791 43.60190
##Treatment within cleavage
clvgstats.onecell.amb <- clvgstats %>% filter(cleavage_stage=="percent_one_cell", treatment=="AMB") %>% 
  summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))
clvgstats.onecell.l <- clvgstats %>% filter(cleavage_stage=="percent_one_cell", treatment=="l") %>% 
  summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))
clvgstats.onecell.xl <- clvgstats %>% filter(cleavage_stage=="percent_one_cell", treatment=="xl") %>% 
  summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))
clvgstats.twocell.amb <- clvgstats %>% filter(cleavage_stage=="percent_two_cell", treatment=="AMB") %>% 
  summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))
clvgstats.twocell.l <- clvgstats %>% filter(cleavage_stage=="percent_two_cell", treatment=="l") %>% 
  summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))
clvgstats.twocell.xl <- clvgstats %>% filter(cleavage_stage=="percent_two_cell", treatment=="xl") %>% 
  summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))
clvgstats.fourcell.amb <- clvgstats %>% filter(cleavage_stage=="percent_four_cell", treatment=="AMB") %>% 
  summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))
clvgstats.fourcell.l <- clvgstats %>% filter(cleavage_stage=="percent_four_cell", treatment=="l") %>% 
  summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))
clvgstats.fourcell.xl <- clvgstats %>% filter(cleavage_stage=="percent_four_cell", treatment=="xl") %>% 
  summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))
clvgstats.gfourcell.amb <- clvgstats %>% filter(cleavage_stage=="percent_greater_than_four_cell", treatment=="AMB") %>% 
  summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))
clvgstats.gfourcell.l <- clvgstats %>% filter(cleavage_stage=="percent_greater_than_four_cell", treatment=="l") %>% 
  summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))
clvgstats.gfourcell.xl <- clvgstats %>% filter(cleavage_stage=="percent_greater_than_four_cell", treatment=="xl") %>% 
  summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))

treatmentstage.clvgstats.rownames <- as.factor(c("OneCell_AMB", "OneCell_l", "OneCell_xl", "TwoCell_AMB", "TwoCell_l", "TwoCell_xl", "FourCell_AMB", "FourCell_l", "FourCell_xl", ">FourCell_AMB", ">FourCell_l", ">FourCell_xl"))
treatmentstage.clvgstats <- as.data.frame(bind_rows(clvgstats.onecell.amb, clvgstats.onecell.l, clvgstats.onecell.xl, clvgstats.twocell.amb, clvgstats.twocell.l, clvgstats.twocell.xl, clvgstats.fourcell.amb, clvgstats.fourcell.l, clvgstats.fourcell.xl, clvgstats.gfourcell.amb, clvgstats.gfourcell.l, clvgstats.gfourcell.xl, .id=NULL))
treatmentstage.clvgstats$cleavage_stage <- treatmentstage.clvgstats.rownames
treatmentstage.clvgstats <- treatmentstage.clvgstats[c(5,1,2,3,4)]
show(treatmentstage.clvgstats)
##    cleavage_stage      mean        sd       min       max
## 1     OneCell_AMB 54.053570 35.792392 22.274882 92.827004
## 2       OneCell_l 41.160814  7.882617 32.258065 47.252747
## 3      OneCell_xl 39.108196 17.523601 21.951220 56.976744
## 4     TwoCell_AMB 12.986273  9.596819  2.109705 20.261438
## 5       TwoCell_l 35.025126  5.298807 30.141844 40.659341
## 6      TwoCell_xl 31.512342  9.863567 20.731707 40.084388
## 7    FourCell_AMB 13.942035  9.305565  3.375527 20.915033
## 8      FourCell_l 18.889054  8.288223  9.890110 26.209677
## 9     FourCell_xl 22.300843 17.215683  8.139535 41.463415
## 10  >FourCell_AMB 19.018122 21.878246  1.687764 43.601896
## 11    >FourCell_l  4.925005  2.553052  2.197802  7.258065
## 12   >FourCell_xl  7.078620  7.751562  1.162791 15.853659

Plot mean_percent_cleavage

Calculate means at each stage

mpc <- normalized_cleavage_counts %>%
  group_by(treatment) %>% 
  summarize("One Cell" = mean(percent_one_cell),
            "Two Cells" = mean(percent_two_cell),
            "Four Cells" = mean(percent_four_cell),
            "> Four Cells" = mean(percent_greater_than_four_cell)) %>% 
  gather(cleavage_stage, mean_percent_cleavage, -treatment, na.rm = TRUE, factor_key = TRUE)
show(mpc)
## # A tibble: 12 x 3
##    treatment cleavage_stage mean_percent_cleavage
##    <fct>     <fct>                          <dbl>
##  1 AMB       One Cell                      0.541 
##  2 l         One Cell                      0.412 
##  3 xl        One Cell                      0.391 
##  4 AMB       Two Cells                     0.130 
##  5 l         Two Cells                     0.350 
##  6 xl        Two Cells                     0.315 
##  7 AMB       Four Cells                    0.139 
##  8 l         Four Cells                    0.189 
##  9 xl        Four Cells                    0.223 
## 10 AMB       > Four Cells                  0.190 
## 11 l         > Four Cells                  0.0493
## 12 xl        > Four Cells                  0.0708

Plot mean_percent_cleavage

final_cell_cleavage <- ggplot(mpc, aes(fill=cleavage_stage, y = mean_percent_cleavage, x=treatment)) + 
  geom_bar(position ="stack", stat = "identity") +
  xlab("pH Treatment") + ylab("Percent per treatment") + 
  scale_fill_brewer(name = "Number of Cells", labels = c("One", "Two", "Four", "> Four"), palette = "Paired") + #colour modification
  theme_bw() + #Set background color 
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank()) + #Set the plot background
  ggtitle("(a)") + #add a main title
  theme(plot.title = element_text(face = 'bold', 
                                  size = 12, 
                                  hjust = 0),
        legend.position = c(0.5, 1), #Place legend top middle of figure
        legend.direction = "horizontal", #Make legend list items in a row versus columns
        legend.background = element_rect(fill = NA), #Legend block no-fill
        legend.title = element_text(size = 10),
        legend.text = element_text(size = 8), legend.key.size = unit(0.5, "cm"),
  legend.key.width = unit(0.5,"cm"))
final_cell_cleavage

Growth through development

Set up workspace

Load libraries

library("tidyverse")
library("dplyr")
library("ggplot2")
library("gridExtra")

Import the data

raw_data <- read_csv("5-Developmental_progression_and_size/Input/size_metric_timeseries_data.csv")

raw_data$sample_ID <- as.factor(raw_data$sample_ID)
raw_data$time_point <- as.factor(raw_data$time_point)
raw_data$treatment <- as.factor(raw_data$treatment)
raw_data$TANK_NUM <- as.factor(raw_data$TANK_NUM)
raw_data$metric_id <- as.factor(raw_data$metric_id)

Check the type of each variable

str(raw_data)
## tibble [850 × 7] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ sample_ID : Factor w/ 34 levels "124","125","135",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ time_point: Factor w/ 7 levels "20190409_201",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ treatment : Factor w/ 4 levels "AMB","Gastrulation",..: NA NA NA NA NA NA NA NA NA NA ...
##  $ TANK_NUM  : Factor w/ 15 levels "1","2","3","4",..: NA NA NA NA NA NA NA NA NA NA ...
##  $ metric_id : Factor w/ 850 levels "124_egg_1","124_egg_10",..: 1 12 19 20 21 22 23 24 25 2 ...
##  $ width1    : num [1:850] 0.521 0.49 0.559 0.491 0.537 0.538 0.607 0.581 0.471 0.444 ...
##  $ width2    : num [1:850] 0.404 0.424 0.361 0.337 0.44 0.402 0.511 0.469 0.545 0.474 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   sample_ID = col_double(),
##   ..   time_point = col_character(),
##   ..   treatment = col_character(),
##   ..   TANK_NUM = col_double(),
##   ..   metric_id = col_character(),
##   ..   width1 = col_double(),
##   ..   width2 = col_double()
##   .. )

Examine volume at each life stage

Unfertilized Eggs (No treatment)

Calculate egg volume
egg_volume <- filter(raw_data, time_point == "Eggs") %>% 
  mutate(volume = ((4*pi*width1/6)*(width2/2)^2)) #Use equation for volume of ellipse
head(egg_volume)
## # A tibble: 6 x 8
##   sample_ID time_point treatment TANK_NUM metric_id width1 width2 volume
##   <fct>     <fct>      <fct>     <fct>    <fct>      <dbl>  <dbl>  <dbl>
## 1 124       Eggs       <NA>      <NA>     124_egg_1  0.521  0.404 0.0445
## 2 124       Eggs       <NA>      <NA>     124_egg_2  0.49   0.424 0.0461
## 3 124       Eggs       <NA>      <NA>     124_egg_3  0.559  0.361 0.0381
## 4 124       Eggs       <NA>      <NA>     124_egg_4  0.491  0.337 0.0292
## 5 124       Eggs       <NA>      <NA>     124_egg_5  0.537  0.44  0.0544
## 6 124       Eggs       <NA>      <NA>     124_egg_6  0.538  0.402 0.0455
Examine the data

Examine distribution of egg volume data

egg_volume_boxplot <- ggplot(egg_volume, aes(y = volume, x=treatment)) +
  ylab(expression("Volume " (mm^2))) + xlab("Treatment") + # Set x and y axis labels
  #expand_limits(y=c(0, 1.0)) +
  geom_jitter(colour = "cadetblue3", alpha = 0.5) +
  geom_boxplot(colour="cadetblue3", alpha=0) + 
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
                      axis.title.x = element_blank(), # No axis title
                      axis.text.x = element_text(color = "White"), #White-out the x-axis label
                      axis.ticks.x = element_blank(), # No ticks on x-axis
                      plot.background=element_blank()) + #Set the plot background
  ggtitle("(b)") + #add a main title
  scale_color_manual(values=c(AMB="cadetblue3", l="palevioletred", xl="indianred3")) + #set treatment colors
  theme(plot.title = element_text(face = 'bold', 
                                  size = 12, 
                                  hjust = 0),
        legend.position = "bottom") #set title attributes
egg_volume_boxplot

#QQ-Plot
qqnorm(egg_volume$volume)
qqline(egg_volume$volume)

Examine egg volume for equal variance with residual regression.

#Create dataframe of predicted and residual values for each volume
egg_volume_res <- egg_volume #Copy dataset to manipulate for residual regression
egg_volume_fit <- lm(volume ~ sample_ID, data = egg_volume_res)  # Fit the model
egg_volume_res$predicted <- predict(egg_volume_fit)   # Save the predicted values
egg_volume_res$residuals <- residuals(egg_volume_fit) # Save the residual values
head(select(egg_volume_res, volume, predicted, residuals))
## # A tibble: 6 x 3
##   volume predicted residuals
##    <dbl>     <dbl>     <dbl>
## 1 0.0445    0.0495  -0.00494
## 2 0.0461    0.0495  -0.00334
## 3 0.0381    0.0495  -0.0113 
## 4 0.0292    0.0495  -0.0203 
## 5 0.0544    0.0495   0.00497
## 6 0.0455    0.0495  -0.00394
#Plot predicted (red) versus residuals (black)
egg_volume_res_scatter <- ggplot(egg_volume_res, aes(x = sample_ID, y = volume)) + # Plot volume versus treament as scatter
  geom_point() +
  geom_point(aes(y = predicted), color = "red") + # Add the predicted values
  ylab("Volume mm2")+ xlab("Sample ID") + #Label axes
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank()) #Set the plot background
egg_volume_res_scatter

#Histogram of residuals
egg_volume_res_hist <- egg_volume_res %>%
  ggplot(aes(x = residuals, fill = volume)) +
    geom_histogram(binwidth = 0.01, color="#e9ecef", alpha=0.6, position = 'identity') +
    labs(fill="") + # Plot volume versus treament as scatter
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank()) #Set the plot background)
egg_volume_res_hist

Examine summary statistics for untreated eggs

untreatedegg <- select(egg_volume, time_point, volume)
untreatedegg.stats <- untreatedegg %>% 
  summarise(mean=mean(volume), sd=sd(volume), min=min(volume), max=max(volume))
notreatment <- as.factor(c("NA"))
untreatedegg.stats$treatment <- notreatment

untreatedegg.stats.rownames <- as.factor(c("Untreated_Eggs"))
untreatedegg.stats$timepoint <-untreatedegg.stats.rownames
untreatedegg.stats <- untreatedegg.stats[c(6,5,1,2,3,4)]
show(untreatedegg.stats)
## # A tibble: 1 x 6
##   timepoint      treatment   mean     sd     min    max
##   <fct>          <fct>      <dbl>  <dbl>   <dbl>  <dbl>
## 1 Untreated_Eggs NA        0.0396 0.0230 0.00150 0.0830
Final plot
final_egg_volume <- egg_volume_boxplot
final_egg_volume

Treated Eggs

Calculate treated eggs volume.
#Use the equation for volume of ellipse to calculate fertilized egg volume.
fert_volume <- filter(raw_data, time_point == "Fertilized_embryos") %>% 
  mutate(volume = ((4*pi*width1/6)*(width2/2)^2)) #Use equation for volume of ellipse
head(fert_volume)
## # A tibble: 6 x 8
##   sample_ID time_point    treatment TANK_NUM metric_id      width1 width2 volume
##   <fct>     <fct>         <fct>     <fct>    <fct>           <dbl>  <dbl>  <dbl>
## 1 135       Fertilized_e… AMB       1        135_fertilzed…  0.456  0.464 0.0514
## 2 135       Fertilized_e… AMB       1        135_fertilzed…  0.424  0.436 0.0422
## 3 135       Fertilized_e… AMB       1        135_fertilzed…  0.435  0.466 0.0495
## 4 135       Fertilized_e… AMB       1        135_fertilzed…  0.42   0.464 0.0473
## 5 135       Fertilized_e… AMB       1        135_fertilzed…  0.449  0.473 0.0526
## 6 135       Fertilized_e… AMB       1        135_fertilzed…  0.465  0.469 0.0536
Examine the data

Examine treated eggs volume for normal distribution using a boxplot with jitter and qq-plot. Examine variation between tanks using a boxplot with jitter.

#Boxplot colored by treatment
fert_volume_t_boxplot <- ggplot(fert_volume, aes(x = treatment, y = volume, color = treatment)) + 
  ylab(expression("Volume " (mm^2))) + xlab("Treatment") + #Label axes
  geom_jitter(alpha = 0.5) +
  geom_boxplot(alpha = 0) +
  theme_bw() + #Set background color
  scale_color_manual(values=c(AMB="cadetblue3", l="palevioletred", xl="indianred3")) + #set treatment colors
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank()) + #Set the plot background
  ggtitle("(c)") + #add a main title
  scale_color_manual(values=c(AMB="cadetblue3", l="palevioletred", xl="indianred3")) + #set treatment colors
  theme(plot.title = element_text(face = 'bold', 
                                  size = 12, 
                                  hjust = 0),
        legend.position = "none") #set title attributes
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
fert_volume_t_boxplot

#Boxplot colored by TANK_NUM
fert_volume_s_boxplot <- ggplot(fert_volume, aes(x = treatment, y = volume, color = TANK_NUM)) + 
  ylab("Volume mm2")+ xlab("Treatment") + #Label axes
  geom_jitter(aes(alpha = 0.3)) +
  geom_boxplot(alpha = 0) +
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank()) #Set the plot background
fert_volume_s_boxplot

#QQ-Plot
qqnorm(fert_volume$volume)
qqline(fert_volume$volume)

Examine treated eggs volume for equal variance with residual regression.

#Create dataframe of predicted and residual values for each volume
fert_volume_res <- fert_volume #Copy dataset to manipulate for residual regression
fert_volume_fit <- lm(volume ~ treatment, data = fert_volume_res)  # Fit the model
fert_volume_res$predicted <- predict(fert_volume_fit)   # Save the predicted values
fert_volume_res$residuals <- residuals(fert_volume_fit) # Save the residual values
head(select(fert_volume_res, treatment, volume, predicted, residuals))
## # A tibble: 6 x 4
##   treatment volume predicted residuals
##   <fct>      <dbl>     <dbl>     <dbl>
## 1 AMB       0.0514    0.0483   0.00306
## 2 AMB       0.0422    0.0483  -0.00614
## 3 AMB       0.0495    0.0483   0.00111
## 4 AMB       0.0473    0.0483  -0.00100
## 5 AMB       0.0526    0.0483   0.00425
## 6 AMB       0.0536    0.0483   0.00521
#Plot predicted (red) versus residuals (black)
fert_volume_res_scatter <- ggplot(fert_volume_res, aes(x = treatment, y = volume)) + # Plot volume versus treament as scatter
  geom_point() +
  geom_point(aes(y = predicted), color = "red") + # Add the predicted values
  ylab("Volume mm2")+ xlab("Treatment") + #Label axes
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank()) #Set the plot background
fert_volume_res_scatter

#Histogram of residuals
fert_volume_res_hist <- fert_volume_res %>%
  ggplot(aes(x = residuals, fill = volume)) +
    geom_histogram(binwidth = 0.0025, color="#e9ecef", alpha=0.6, position = 'identity') +
    labs(fill="") + # Plot volume versus treament as scatter
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank()) #Set the plot background)
fert_volume_res_hist

#Histogram of residuals separated by treatment
fert_volume_res_t_hist <- fert_volume_res %>%
  ggplot(aes(x = residuals, fill = treatment)) +
    geom_histogram(binwidth = 0.0025, color="#e9ecef", alpha=0.6, position = 'identity') +
    labs(fill="") + # Plot volume versus treament as scatter
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank()) + #Set the plot background)
facet_wrap(facets = vars(treatment)) #Split 1 graph into three each showing a different treatment
fert_volume_res_t_hist

Statistical Analysis

Testing for tank effects: 1-Way ANOVA of treated eggs volume against tank nested within treatment.

#1-Way ANOVA of volume against tank nested within treatment.
fert_volume_nested_anova <- aov(volume ~ treatment/TANK_NUM, data = fert_volume)
summary(fert_volume_nested_anova)
##                     Df   Sum Sq   Mean Sq F value  Pr(>F)    
## treatment            2 0.000140 0.0000702    1.37   0.256    
## treatment:TANK_NUM   6 0.003343 0.0005571   10.88 1.4e-10 ***
## Residuals          216 0.011058 0.0000512                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Testing the effects of treatment on volume:

#1-Way ANOVA of volume against treatment
fert_volume_anova <- aov(volume ~ treatment, data = fert_volume)
summary(fert_volume_anova)
##              Df  Sum Sq   Mean Sq F value Pr(>F)
## treatment     2 0.00014 7.015e-05   1.082  0.341
## Residuals   222 0.01440 6.487e-05

Examine summary statistics for treated eggs

treatedegg <- select(fert_volume, time_point:treatment, volume)
treatedegg.amb <- treatedegg %>% filter(treatment=="AMB")
treatedegg.amb.stats <- treatedegg.amb %>% summarise(mean=mean(volume), sd=sd(volume), min=min(volume), max=max(volume))
amb <- as.factor(c("AMB"))
treatedegg.amb.stats$treatment <- amb

treatedegg.l <- treatedegg %>% filter(treatment=="l")
treatedegg.l.stats <- treatedegg.l %>% summarise(mean=mean(volume), sd=sd(volume), min=min(volume), max=max(volume))
l <- as.factor(c("l"))
treatedegg.l.stats$treatment <- l

treatedegg.xl <- treatedegg %>% filter(treatment=="xl")
treatedegg.xl.stats <- treatedegg.xl %>% summarise(mean=mean(volume), sd=sd(volume), min=min(volume), max=max(volume))
xl <- as.factor(c("xl"))
treatedegg.xl.stats$treatment <- xl

treatedegg.stats.rownames <- as.factor(c("Treated_egg", "Treated_egg", "Treated_egg"))
treatedegg.stats <- as.data.frame(bind_rows(treatedegg.amb.stats, treatedegg.l.stats, treatedegg.xl.stats, .id=NULL))
treatedegg.stats$timepoint <-treatedegg.stats.rownames
treatedegg.stats <- treatedegg.stats[c(6,5,1,2,3,4)]
show(treatedegg.stats)
##     timepoint treatment       mean          sd        min        max
## 1 Treated_egg       AMB 0.04834626 0.008168517 0.02827965 0.07318527
## 2 Treated_egg         l 0.04762602 0.006896299 0.03435164 0.07040271
## 3 Treated_egg        xl 0.04954085 0.008961923 0.03259548 0.07502386
Final Plot
final_fert_volume <- fert_volume_t_boxplot +
  annotate("text", x = 1.2, y = 0.068, label = "a") +
  annotate("text", x = 2.2, y = 0.068, label = "a") +
  annotate("text", x = 3.2, y = 0.068, label = "a")
final_fert_volume

Gastrulation

Calculate Gastrula Volume
gas_volume <- filter(raw_data, time_point == "Gastrulation") %>% 
  mutate(volume = ((4*pi*width1/6)*(width2/2)^2))  #Use equation for volume of ellipse
head(gas_volume)
## # A tibble: 6 x 8
##   sample_ID time_point   treatment TANK_NUM metric_id       width1 width2 volume
##   <fct>     <fct>        <fct>     <fct>    <fct>            <dbl>  <dbl>  <dbl>
## 1 187       Gastrulation AMB       1        187_gastrulati…  0.614  0.456 0.0668
## 2 187       Gastrulation AMB       1        187_gastrulati…  0.571  0.492 0.0724
## 3 187       Gastrulation AMB       1        187_gastrulati…  0.581  0.489 0.0727
## 4 187       Gastrulation AMB       1        187_gastrulati…  0.572  0.382 0.0437
## 5 187       Gastrulation AMB       1        187_gastrulati…  0.701  0.37  0.0502
## 6 187       Gastrulation AMB       1        187_gastrulati…  0.551  0.511 0.0753
Examine the Data

Examine gastrula volume for normal distribution using a boxplot with jitter and qq-plot. Examine variation between tanks using a boxplot with jitter.

#Boxplot colored by treatment
gas_volume_t_boxplot <- ggplot(gas_volume, aes(x = treatment, y = volume, color = treatment)) +
  ylab("Volume mm2")+ xlab("Treatment") + #Label axes
  geom_jitter(aes(alpha = 0.3)) +
  geom_boxplot(alpha = 0) +
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank()) #Set the plot background
show(gas_volume_t_boxplot)

#Boxplot colored by TANK_NUM
gas_volume_s_boxplot <- ggplot(gas_volume, aes(x = treatment, y = volume, color = TANK_NUM)) + 
  ylab("Volume mm2")+ xlab("Treatment") + #Label axes
  geom_jitter(aes(alpha = 0.3)) +
  geom_boxplot(alpha = 0) +
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank()) #Set the plot background
show(gas_volume_s_boxplot)

#QQ-Plot
qqnorm(gas_volume$volume)
qqline(gas_volume$volume)

Transform the Data

The qq-plot shows that the data does not look normal. We will transform the data below using the ‘SQRT’ function.

gas_volume_sqrt <- mutate(gas_volume, sqrt_volume = sqrt(volume)) # Transform the distribution
head(gas_volume_sqrt)
## # A tibble: 6 x 9
##   sample_ID time_point treatment TANK_NUM metric_id width1 width2 volume
##   <fct>     <fct>      <fct>     <fct>    <fct>      <dbl>  <dbl>  <dbl>
## 1 187       Gastrulat… AMB       1        187_gast…  0.614  0.456 0.0668
## 2 187       Gastrulat… AMB       1        187_gast…  0.571  0.492 0.0724
## 3 187       Gastrulat… AMB       1        187_gast…  0.581  0.489 0.0727
## 4 187       Gastrulat… AMB       1        187_gast…  0.572  0.382 0.0437
## 5 187       Gastrulat… AMB       1        187_gast…  0.701  0.37  0.0502
## 6 187       Gastrulat… AMB       1        187_gast…  0.551  0.511 0.0753
## # … with 1 more variable: sqrt_volume <dbl>
Examine the Data Again

Examine sqrt-transformed gastrula volume for normal distribution using a boxplot with jitter and qq-plot. Examine variation between tanks using a boxplot with jitter.

#Boxplot colored by treatment
gas_sqrt_volume_t_boxplot <- ggplot(gas_volume_sqrt, aes(x = treatment, y = (sqrt_volume)^2, color = treatment)) + #Square the sqrt volume to make size visually comparable across life stages
   ylab(expression("Volume " (mm^2))) + xlab("Treatment") + #Label axes
  geom_jitter(alpha = 0.5) +
  geom_boxplot(alpha = 0) +
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank()) + #Set the plot background
  ggtitle("(d)") + #add a main title
  scale_color_manual(values=c(AMB="cadetblue3", l="palevioletred", xl="indianred3")) + #set treatment colors
  theme(plot.title = element_text(face = 'bold', 
                                  size = 12, 
                                  hjust = 0),
        legend.position = "none") #set title attributes
show(gas_sqrt_volume_t_boxplot)

#Boxplot colored by TANK_NUM
gas_sqrt_volume_s_boxplot <- ggplot(gas_volume_sqrt, aes(x = treatment, y = sqrt_volume, color = TANK_NUM)) + 
  ylab("Volume mm2")+ xlab("Treatment") + #Label axes
  geom_jitter(aes(alpha = 0.3)) +
  geom_boxplot(alpha = 0) +
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank()) #Set the plot background
show(gas_volume_s_boxplot)

#QQ-Plot
qqnorm(gas_volume_sqrt$sqrt_volume)
qqline(gas_volume_sqrt$sqrt_volume)

Examine sqrt-transformed gastrulation volume for equal variance with residual regression.

#Create dataframe of predicted and residual values for each volume
gas_sqrt_volume_res <- gas_volume_sqrt #Copy dataset to manipulate for residual regression
gas_sqrt_volume_fit <- lm(sqrt_volume ~ treatment, data = gas_sqrt_volume_res)  # Fit the model
gas_sqrt_volume_res$predicted <- predict(gas_sqrt_volume_fit)   # Save the predicted values
gas_sqrt_volume_res$residuals <- residuals(gas_sqrt_volume_fit) # Save the residual values
head(select(gas_sqrt_volume_res, treatment, sqrt_volume, predicted, residuals))
## # A tibble: 6 x 4
##   treatment sqrt_volume predicted residuals
##   <fct>           <dbl>     <dbl>     <dbl>
## 1 AMB             0.259     0.252   0.00680
## 2 AMB             0.269     0.252   0.0173 
## 3 AMB             0.270     0.252   0.0180 
## 4 AMB             0.209     0.252  -0.0427 
## 5 AMB             0.224     0.252  -0.0276 
## 6 AMB             0.274     0.252   0.0227
#Plot predicted (red) versus residuals (black)
gas_sqrt_volume_res_scatter <- ggplot(gas_sqrt_volume_res, aes(x = treatment, y = sqrt_volume)) + # Plot volume versus treament as scatter
  geom_point() +
  geom_point(aes(y = predicted), color = "red") + # Add the predicted values +
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank()) #Set the plot background
gas_sqrt_volume_res_scatter

#Histogram of residuals
gas_sqrt_volume_res_hist <- gas_sqrt_volume_res %>%
  ggplot(aes(x = residuals, fill = volume)) +
    geom_histogram(binwidth = 0.01, color="#e9ecef", alpha=0.6, position = 'identity') +
    labs(fill="") + # Plot volume versus treament as scatter
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank()) #Set the plot background)
gas_sqrt_volume_res_hist

#Histogram of residuals separated by treatment
gas_sqrt_volume_res_t_hist <- gas_sqrt_volume_res %>%
  ggplot(aes(x = residuals, fill = treatment)) +
    geom_histogram(binwidth = 0.01, color="#e9ecef", alpha=0.6, position = 'identity') +
    labs(fill="") + # Plot volume versus treament as scatter
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank()) + #Set the plot background)
facet_wrap(facets = vars(treatment)) #Split 1 graph into three each showing a different treatment
gas_sqrt_volume_res_t_hist

Statistical Analysis

Testing for tank effects: 1-Way ANOVA of sqrt-transformed planulae volume against tank nested within treatment.

#1-Way ANOVA of volume against tank nested within treatment.
gas_sqrt_volume_ts_anova <- aov(volume ~ treatment/TANK_NUM, data = gas_volume_sqrt)
summary(gas_sqrt_volume_ts_anova)
##                     Df   Sum Sq   Mean Sq F value   Pr(>F)    
## treatment            2 0.002446 0.0012231   8.490 0.000307 ***
## treatment:TANK_NUM   4 0.000510 0.0001275   0.885 0.474327    
## Residuals          168 0.024204 0.0001441                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Testing the effects of treatment on volume:

#1-Way ANOVA of volume against treatment.
gas_sqrt_volume_txs_anova <- aov(sqrt_volume ~ treatment, data = gas_volume_sqrt)
summary(gas_sqrt_volume_txs_anova)
##              Df  Sum Sq  Mean Sq F value   Pr(>F)    
## treatment     2 0.01002 0.005012   8.535 0.000292 ***
## Residuals   172 0.10101 0.000587                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(gas_sqrt_volume_txs_anova)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = sqrt_volume ~ treatment, data = gas_volume_sqrt)
## 
## $treatment
##                diff          lwr          upr     p adj
## l-AMB  -0.017399962 -0.027859991 -0.006939933 0.0003564
## xl-AMB -0.012094266 -0.022554295 -0.001634237 0.0188790
## xl-l    0.005305696 -0.006152691  0.016764084 0.5186419

Examine summary statistics for gastrula

gas <- select(gas_volume, time_point:treatment, volume)

gas.amb <- gas %>% filter(treatment=="AMB")
gas.amb.stats <- gas.amb %>% summarise(mean=mean(volume), sd=sd(volume), min=min(volume), max=max(volume))
gas.amb.stats$treatment <- amb

gas.l <- gas %>% filter(treatment=="l")
gas.l.stats <- gas.l %>% summarise(mean=mean(volume), sd=sd(volume), min=min(volume), max=max(volume))
gas.l.stats$treatment <- l

gas.xl <- gas %>% filter(treatment=="xl")
gas.xl.stats <- gas.xl %>% summarise(mean=mean(volume), sd=sd(volume), min=min(volume), max=max(volume))
gas.xl.stats$treatment <- xl

gas.stats.rownames <- as.factor(c("Gastrulation", "Gastrulation", "Gastrulation"))
gas.stats <- as.data.frame(bind_rows(gas.amb.stats, gas.l.stats, gas.xl.stats, .id=NULL))
gas.stats$timepoint <-gas.stats.rownames
gas.stats <- gas.stats[c(6,5,1,2,3,4)]
show(gas.stats)
##      timepoint treatment       mean         sd        min        max
## 1 Gastrulation       AMB 0.06404722 0.01338058 0.03962596 0.11147668
## 2 Gastrulation         l 0.05556756 0.01218978 0.03560216 0.08503743
## 3 Gastrulation        xl 0.05781054 0.00924022 0.03422066 0.07561363
Final plot
final_gas_volume <- gas_sqrt_volume_t_boxplot +
  annotate("text", x = 1.2, y = 0.1, label = "a") +
  annotate("text", x = 2.2, y = 0.1, label = "b") +
  annotate("text", x = 3.2, y = 0.1, label = "b")
final_gas_volume

Planulae

Calculate planulae volume
planulae_volume <- filter(raw_data, time_point == "Planulae") %>% 
  mutate(volume = ((4*pi*width1/6)*(width2/2)^2))  #Use equation for volume of ellipse
head(planulae_volume)
## # A tibble: 6 x 8
##   sample_ID time_point treatment TANK_NUM metric_id      width1 width2 volume
##   <fct>     <fct>      <fct>     <fct>    <fct>           <dbl>  <dbl>  <dbl>
## 1 362       Planulae   AMB       10       362_planulae_1  0.851  0.441 0.0867
## 2 362       Planulae   AMB       10       362_planulae_2  0.817  0.444 0.0843
## 3 362       Planulae   AMB       10       362_planulae_3  0.684  0.388 0.0539
## 4 362       Planulae   AMB       10       362_planulae_4  0.796  0.515 0.111 
## 5 362       Planulae   AMB       10       362_planulae_5  0.75   0.452 0.0802
## 6 362       Planulae   AMB       10       362_planulae_6  0.892  0.405 0.0766
Examine the Data

Examine planula volume for normal distribution using a boxplot with jitter and qq-plot. Examine variation between tanks using a boxplot with jitter.

#Boxplot colored by treatment
planulae_volume_t_boxplot <- ggplot(planulae_volume, aes(x = treatment, y = volume, color = treatment)) +
  ylab("Volume mm2")+ xlab("Treatment") + #Label axes
  geom_jitter(aes(alpha = 0.3)) +
  geom_boxplot(alpha = 0) +
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank()) #Set the plot background
show(planulae_volume_t_boxplot)

#Boxplot colored by TANK_NUM
planulae_volume_s_boxplot <- ggplot(planulae_volume, aes(x = treatment, y = volume, color = TANK_NUM)) + 
  ylab("Volume mm2")+ xlab("Treatment") + #Label axes
  geom_jitter(aes(alpha = 0.3)) +
  geom_boxplot(alpha = 0) +
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank()) #Set the plot background
show(planulae_volume_s_boxplot)

#QQ-Plot
qqnorm(planulae_volume$volume)
qqline(planulae_volume$volume)

Transform the Data

The qq-plot shows that the data does not look normal. We will transform the data below using the ‘SQRT’ function.

planulae_volume_sqrt <- mutate(planulae_volume, sqrt_volume = sqrt(volume)) # Transform the distribution
head(planulae_volume_sqrt)
## # A tibble: 6 x 9
##   sample_ID time_point treatment TANK_NUM metric_id width1 width2 volume
##   <fct>     <fct>      <fct>     <fct>    <fct>      <dbl>  <dbl>  <dbl>
## 1 362       Planulae   AMB       10       362_plan…  0.851  0.441 0.0867
## 2 362       Planulae   AMB       10       362_plan…  0.817  0.444 0.0843
## 3 362       Planulae   AMB       10       362_plan…  0.684  0.388 0.0539
## 4 362       Planulae   AMB       10       362_plan…  0.796  0.515 0.111 
## 5 362       Planulae   AMB       10       362_plan…  0.75   0.452 0.0802
## 6 362       Planulae   AMB       10       362_plan…  0.892  0.405 0.0766
## # … with 1 more variable: sqrt_volume <dbl>
Examine the Data Again

Examine sqrt-transformed planula volume for normal distribution using a boxplot with jitter and qq-plot. Examine variation between tanks using a boxplot with jitter.

#Boxplot colored by treatment
planulae_sqrt_volume_t_boxplot <- ggplot(planulae_volume_sqrt, aes(x = treatment, y = (sqrt_volume)^2, color = treatment)) + #Square the sqrt volume to make size visually comparable across life stages
  ylab(expression("Volume " (mm^2))) + xlab("Treatment") + #Label axes
  geom_jitter(alpha = 0.5) +
  geom_boxplot(alpha = 0) +
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank()) + #Set the plot background
  ggtitle("(e)") + #add a main title
  scale_color_manual(values=c(AMB="cadetblue3", l="palevioletred", xl="indianred3")) + #set treatment colors
  theme(plot.title = element_text(face = 'bold', 
                                  size = 12, 
                                  hjust = 0), 
        legend.position = ("none")) #set title attributes
planulae_sqrt_volume_t_boxplot

#Boxplot colored by TANK_NUM
planulae_sqrt_volume_s_boxplot <- ggplot(planulae_volume_sqrt, aes(x = treatment, y = sqrt_volume, color = TANK_NUM)) + 
  ylab("Volume mm2")+ xlab("Treatment") + #Label axes
  geom_jitter(aes(alpha = 0.3)) +
  geom_boxplot(alpha = 0) +
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank()) #Set the plot background
show(planulae_volume_s_boxplot)

#QQ-Plot
qqnorm(planulae_volume_sqrt$sqrt_volume)
qqline(planulae_volume_sqrt$sqrt_volume)

Examine sqrt-transformed planulae volume for equal variance with residual regression.

#Create dataframe of predicted and residual values for each volume
planulae_sqrt_volume_res <- planulae_volume_sqrt #Copy dataset to manipulate for residual regression
planulae_sqrt_volume_fit <- lm(sqrt_volume ~ treatment, data = planulae_sqrt_volume_res)  # Fit the model
planulae_sqrt_volume_res$predicted <- predict(planulae_sqrt_volume_fit)   # Save the predicted values
planulae_sqrt_volume_res$residuals <- residuals(planulae_sqrt_volume_fit) # Save the residual values
head(select(planulae_sqrt_volume_res, treatment, sqrt_volume, predicted, residuals))
## # A tibble: 6 x 4
##   treatment sqrt_volume predicted residuals
##   <fct>           <dbl>     <dbl>     <dbl>
## 1 AMB             0.294     0.242   0.0526 
## 2 AMB             0.290     0.242   0.0486 
## 3 AMB             0.232     0.242  -0.00960
## 4 AMB             0.332     0.242   0.0907 
## 5 AMB             0.283     0.242   0.0415 
## 6 AMB             0.277     0.242   0.0350
#Plot predicted (red) versus residuals (black)
planulae_sqrt_volume_res_scatter <- ggplot(planulae_sqrt_volume_res, aes(x = treatment, y = sqrt_volume)) + # Plot volume versus treament as scatter
  geom_point() +
  geom_point(aes(y = predicted), color = "red") + # Add the predicted values +
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank()) #Set the plot background
planulae_sqrt_volume_res_scatter

#Histogram of residuals
planulae_sqrt_volume_res_hist <- planulae_sqrt_volume_res %>%
  ggplot(aes(x = residuals, fill = volume)) +
    geom_histogram(binwidth = 0.01, color="#e9ecef", alpha=0.6, position = 'identity') +
    labs(fill="") + # Plot volume versus treament as scatter
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank()) #Set the plot background)
planulae_sqrt_volume_res_hist

#Histogram of residuals separated by treatment
planulae_sqrt_volume_res_t_hist <- planulae_sqrt_volume_res %>%
  ggplot(aes(x = residuals, fill = treatment)) +
    geom_histogram(binwidth = 0.01, color="#e9ecef", alpha=0.6, position = 'identity') +
    labs(fill="") + # Plot volume versus treament as scatter
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank()) + #Set the plot background)
facet_wrap(facets = vars(treatment)) #Split 1 graph into three each showing a different treatment
planulae_sqrt_volume_res_t_hist

Statistical Analysis

Testing for tank effects: 1-Way ANOVA of sqrt-transformed planulae volume against treatment. 1-Way ANOVA of volume against tank nested within treatment.

#1-Way ANOVA of volume against tank nested within treatment.
planulae_sqrt_volume_ts_anova <- aov(volume ~ treatment/TANK_NUM, data = planulae_volume_sqrt)
summary(planulae_sqrt_volume_ts_anova)
##                     Df  Sum Sq   Mean Sq F value   Pr(>F)    
## treatment            2 0.00506 0.0025286   10.30 6.57e-05 ***
## treatment:TANK_NUM   3 0.00144 0.0004809    1.96    0.123    
## Residuals          144 0.03534 0.0002454                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Testing the effects of treatment on volume:

#1-Way ANOVA of volume against treatment
planulae_sqrt_volume_txs_anova <- aov(sqrt_volume ~ treatment, data = planulae_volume_sqrt)
summary(planulae_sqrt_volume_txs_anova)
##              Df  Sum Sq  Mean Sq F value   Pr(>F)    
## treatment     2 0.02277 0.011387   9.716 0.000109 ***
## Residuals   147 0.17227 0.001172                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(planulae_sqrt_volume_txs_anova)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = sqrt_volume ~ treatment, data = planulae_volume_sqrt)
## 
## $treatment
##                diff          lwr          upr     p adj
## l-AMB  -0.029414826 -0.045625704 -0.013203947 0.0000928
## xl-AMB -0.020562553 -0.036773432 -0.004351675 0.0087664
## xl-l    0.008852273 -0.007358606  0.025063151 0.4014180

Examine summary statistics for planula

plan <- select(planulae_volume, time_point:treatment, volume)

plan.amb <- plan %>% filter(treatment=="AMB")
plan.amb.stats <- plan.amb %>% summarise(mean=mean(volume), sd=sd(volume), min=min(volume), max=max(volume))
plan.amb.stats$treatment <- amb

plan.l <- plan %>% filter(treatment=="l")
plan.l.stats <- plan.l %>% summarise(mean=mean(volume), sd=sd(volume), min=min(volume), max=max(volume))
plan.l.stats$treatment <- l

plan.xl <- plan %>% filter(treatment=="xl")
plan.xl.stats <- plan.xl %>% summarise(mean=mean(volume), sd=sd(volume), min=min(volume), max=max(volume))
plan.xl.stats$treatment <- xl

plan.stats.rownames <- as.factor(c("Planulae", "Planulae", "Planulae"))
plan.stats <- as.data.frame(bind_rows(plan.amb.stats, plan.l.stats, plan.xl.stats, .id=NULL))
plan.stats$timepoint <-plan.stats.rownames
plan.stats <- plan.stats[c(6,5,1,2,3,4)]
show(plan.stats)
##   timepoint treatment       mean         sd        min        max
## 1  Planulae       AMB 0.05984008 0.01878055 0.02089216 0.11329448
## 2  Planulae         l 0.04598178 0.01271031 0.02244043 0.07760985
## 3  Planulae        xl 0.05013986 0.01537470 0.01709592 0.08994879
Final table
volume.stats <- as.data.frame(bind_rows(untreatedegg.stats, treatedegg.stats, gas.stats, plan.stats, .id=NULL))
volume.stats <- volume.stats[c(6,1,2,3,4,5)]
show(volume.stats)
##           max      timepoint treatment       mean          sd         min
## 1  0.08299064 Untreated_Eggs        NA 0.03958601 0.023022591 0.001504261
## 2  0.07318527    Treated_egg       AMB 0.04834626 0.008168517 0.028279649
## 3  0.07040271    Treated_egg         l 0.04762602 0.006896299 0.034351644
## 4  0.07502386    Treated_egg        xl 0.04954085 0.008961923 0.032595475
## 5  0.11147668   Gastrulation       AMB 0.06404722 0.013380581 0.039625961
## 6  0.08503743   Gastrulation         l 0.05556756 0.012189782 0.035602165
## 7  0.07561363   Gastrulation        xl 0.05781054 0.009240220 0.034220665
## 8  0.11329448       Planulae       AMB 0.05984008 0.018780552 0.020892161
## 9  0.07760985       Planulae         l 0.04598178 0.012710312 0.022440434
## 10 0.08994879       Planulae        xl 0.05013986 0.015374697 0.017095919
Final plot
final_plan_volume <- planulae_sqrt_volume_t_boxplot +
  annotate("text", x = 1.2, y = 0.105, label = "a") +
  annotate("text", x = 2.2, y = 0.105, label = "b") +
  annotate("text", x = 3.2, y = 0.105, label = "b")
final_plan_volume

fig_2 <- grid.arrange(final_cell_cleavage, arrangeGrob(final_egg_volume, final_fert_volume, final_gas_volume, final_plan_volume, ncol = 2, nrow = 2), ncol = 2, nrow = 1, widths = c(3,7), clip = "off")

ggsave("5-Developmental_progression_and_size/Output/fig2_embryo_development.pdf", fig_2, width = 16, height = 8, units = c("in"))